#!/bin/bash
# ODS拉链表Clue

HIVE_HOME=/usr/bin/hive
SQOOP_HOME=/usr/bin/sqoop

# 需求：能够手动传入参数指定采集的日期。如果不传入参数要求能够自动的识别前一天的日期
if [ $# -eq 0 ]
then
    DATESTR=`date -d "-1 day" +%Y-%m-%d`
else
    DATESTR=$1
fi

# 一、重建customer_clue_update增量表，每次使用都要重建，避免数据重复问题
${HIVE_HOME} -e "
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
DROP TABLE itcast_ods.customer_clue_update;
CREATE TABLE IF NOT EXISTS itcast_ods.customer_clue_update (
  id int COMMENT 'customer_clue_id',
  create_date_time STRING COMMENT '创建时间',
  update_date_time STRING COMMENT '最后更新时间',
  deleted STRING COMMENT '是否被删除（禁用）',
  customer_id int COMMENT '客户id',
  customer_relationship_id int COMMENT '客户关系id',
  session_id STRING COMMENT '七陌会话id',
  sid STRING COMMENT '访客id',
  status STRING COMMENT '状态（undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转）',
  users STRING COMMENT '所属坐席',
  create_time STRING COMMENT '七陌创建时间',
  platform STRING COMMENT '平台来源 （pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询）',
  s_name STRING COMMENT '用户名称',
  seo_source STRING COMMENT '搜索来源',
  seo_keywords STRING COMMENT '关键字',
  ip STRING COMMENT 'IP地址',
  referrer STRING COMMENT '上级来源页面',
  from_url STRING COMMENT '会话来源页面',
  landing_page_url STRING COMMENT '访客着陆页面',
  url_title STRING COMMENT '咨询页面title',
  to_peer STRING COMMENT '所属技能组',
  manual_time STRING COMMENT '人工开始时间',
  begin_time STRING COMMENT '坐席领取时间 ',
  reply_msg_count int COMMENT '客服回复消息数',
  total_msg_count int COMMENT '消息总数',
  msg_count int COMMENT '客户发送消息数',
  comment STRING COMMENT '备注',
  finish_reason STRING COMMENT '结束类型',
  finish_user STRING COMMENT '结束坐席',
  end_time STRING COMMENT '会话结束时间',
  platform_description STRING COMMENT '客户平台信息',
  browser_name STRING COMMENT '浏览器名称',
  os_info STRING COMMENT '系统名称',
  area STRING COMMENT '区域',
  country STRING COMMENT '所在国家',
  province STRING COMMENT '省',
  city STRING COMMENT '城市',
  creator int COMMENT '创建人',
  name STRING COMMENT '客户姓名',
  idcard STRING COMMENT '身份证号',
  phone STRING COMMENT '手机号',
  itcast_school_id int COMMENT '校区Id',
  itcast_school STRING COMMENT '校区',
  itcast_subject_id int COMMENT '学科Id',
  itcast_subject STRING COMMENT '学科',
  wechat STRING COMMENT '微信',
  qq STRING COMMENT 'qq号',
  email STRING COMMENT '邮箱',
  gender STRING COMMENT '性别',
  level STRING COMMENT '客户级别',
  origin_type STRING COMMENT '数据来源渠道',
  information_way STRING COMMENT '资讯方式',
  working_years STRING COMMENT '开始工作时间',
  technical_directions STRING COMMENT '技术方向',
  customer_state STRING COMMENT '当前客户状态',
  valid STRING COMMENT '该线索是否是网资有效线索',
  anticipat_signup_date STRING COMMENT '预计报名时间',
  clue_state STRING COMMENT '线索状态',
  scrm_department_id int COMMENT 'SCRM内部部门id',
  superior_url STRING COMMENT '诸葛获取上级页面URL',
  superior_source STRING COMMENT '诸葛获取上级页面URL标题',
  landing_url STRING COMMENT '诸葛获取着陆页面URL',
  landing_source STRING COMMENT '诸葛获取着陆页面URL来源',
  info_url STRING COMMENT '诸葛获取留咨页URL',
  info_source STRING COMMENT '诸葛获取留咨页URL标题',
  origin_channel STRING COMMENT '投放渠道',
  course_id int COMMENT '课程编号',
  course_name STRING COMMENT '课程名称',
  zhuge_session_id STRING COMMENT 'zhuge会话id',
  is_repeat int COMMENT '是否重复线索(手机号维度) 0:正常 1：重复',
  tenant int COMMENT '租户id',
  activity_id STRING COMMENT '活动id',
  activity_name STRING COMMENT '活动名称',
  follow_type int COMMENT '分配类型，0-自动分配，1-手动分配，2-自动转移，3-手动单个转移，4-手动批量转移，5-公海领取',
  shunt_mode_id int COMMENT '匹配到的技能组id',
  shunt_employee_group_id int COMMENT '所属分流员工组',
  ends_time STRING COMMENT '有效时间')
comment '客户关系表'
PARTITIONED BY(starts_time STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
stored as orc
TBLPROPERTIES ('orc.compress'='ZLIB');
"

# 二、抽取昨天新增和更新的数据
${SQOOP_HOME} import \
--connect jdbc:mysql://192.168.10.150:3306/scrm \
--username root \
--password 123456 \
--query 'select id,
    create_date_time,
    update_date_time,
    deleted,
    customer_id,
    customer_relationship_id,
    session_id,
    sid,
    status,
    user,
    create_time, platform,
    s_name, seo_source,
    seo_keywords,
    ip, referrer,
    from_url,
    landing_page_url,
    url_title,
    to_peer,
    manual_time,
    begin_time,
    reply_msg_count,
    total_msg_count,
    msg_count,
    comment,
    finish_reason,
    finish_user,
    end_time,
    platform_description,
    browser_name,
    os_info,
    area,
    country,
    province,
    city,
    creator,
    name,
    idcard,
    phone,
    itcast_school_id,
    itcast_school,
    itcast_subject_id,
    itcast_subject,
    wechat,
    qq,
    email,
    gender,
    level,
    origin_type,
    information_way,
    working_years,
    technical_directions,
    customer_state,
    valid,
    anticipat_signup_date,
    clue_state,
    scrm_department_id,
    superior_url,
    superior_source,
    landing_url,
    landing_source,
    info_url,
    info_source,
    origin_channel,
    course_id,
    course_name,
    zhuge_session_id,
    is_repeat,
    tenant,
    activity_id,
    activity_name,
    follow_type,
    shunt_mode_id,
    shunt_employee_group_id,
    current_date() as starts_time,
    '9999-12-31' as ends_time
from customer_clue
where (
        create_date_time >= '${DATESTR} 00:00:00'
        and
        create_date_time <= '${DATESTR} 23:59:59')
   or (
        update_date_time >= '${DATESTR} 00:00:00'
        and
        update_date_time <= '${DATESTR} 23:59:59') $CONDITIONS' \
--hcatalog-database itcast_ods \
--hcatalog-table customer_clue_update \
--hive-partition-key starts_time \
--hive-partition-value ${DATESTR} \
-m 10 \
--split-by id

# 三、重建customer_relationship_tmp临时表
${HIVE_HOME} -e "
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
DROP TABLE itcast_ods.customer_clue_tmp;
CREATE TABLE IF NOT EXISTS itcast_ods.customer_clue_tmp (
  id int COMMENT 'customer_clue_id',
  create_date_time STRING COMMENT '创建时间',
  update_date_time STRING COMMENT '最后更新时间',
  deleted STRING COMMENT '是否被删除（禁用）',
  customer_id int COMMENT '客户id',
  customer_relationship_id int COMMENT '客户关系id',
  session_id STRING COMMENT '七陌会话id',
  sid STRING COMMENT '访客id',
  status STRING COMMENT '状态（undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转）',
  users STRING COMMENT '所属坐席',
  create_time STRING COMMENT '七陌创建时间',
  platform STRING COMMENT '平台来源 （pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询）',
  s_name STRING COMMENT '用户名称',
  seo_source STRING COMMENT '搜索来源',
  seo_keywords STRING COMMENT '关键字',
  ip STRING COMMENT 'IP地址',
  referrer STRING COMMENT '上级来源页面',
  from_url STRING COMMENT '会话来源页面',
  landing_page_url STRING COMMENT '访客着陆页面',
  url_title STRING COMMENT '咨询页面title',
  to_peer STRING COMMENT '所属技能组',
  manual_time STRING COMMENT '人工开始时间',
  begin_time STRING COMMENT '坐席领取时间 ',
  reply_msg_count int COMMENT '客服回复消息数',
  total_msg_count int COMMENT '消息总数',
  msg_count int COMMENT '客户发送消息数',
  comment STRING COMMENT '备注',
  finish_reason STRING COMMENT '结束类型',
  finish_user STRING COMMENT '结束坐席',
  end_time STRING COMMENT '会话结束时间',
  platform_description STRING COMMENT '客户平台信息',
  browser_name STRING COMMENT '浏览器名称',
  os_info STRING COMMENT '系统名称',
  area STRING COMMENT '区域',
  country STRING COMMENT '所在国家',
  province STRING COMMENT '省',
  city STRING COMMENT '城市',
  creator int COMMENT '创建人',
  name STRING COMMENT '客户姓名',
  idcard STRING COMMENT '身份证号',
  phone STRING COMMENT '手机号',
  itcast_school_id int COMMENT '校区Id',
  itcast_school STRING COMMENT '校区',
  itcast_subject_id int COMMENT '学科Id',
  itcast_subject STRING COMMENT '学科',
  wechat STRING COMMENT '微信',
  qq STRING COMMENT 'qq号',
  email STRING COMMENT '邮箱',
  gender STRING COMMENT '性别',
  level STRING COMMENT '客户级别',
  origin_type STRING COMMENT '数据来源渠道',
  information_way STRING COMMENT '资讯方式',
  working_years STRING COMMENT '开始工作时间',
  technical_directions STRING COMMENT '技术方向',
  customer_state STRING COMMENT '当前客户状态',
  valid STRING COMMENT '该线索是否是网资有效线索',
  anticipat_signup_date STRING COMMENT '预计报名时间',
  clue_state STRING COMMENT '线索状态',
  scrm_department_id int COMMENT 'SCRM内部部门id',
  superior_url STRING COMMENT '诸葛获取上级页面URL',
  superior_source STRING COMMENT '诸葛获取上级页面URL标题',
  landing_url STRING COMMENT '诸葛获取着陆页面URL',
  landing_source STRING COMMENT '诸葛获取着陆页面URL来源',
  info_url STRING COMMENT '诸葛获取留咨页URL',
  info_source STRING COMMENT '诸葛获取留咨页URL标题',
  origin_channel STRING COMMENT '投放渠道',
  course_id int COMMENT '课程编号',
  course_name STRING COMMENT '课程名称',
  zhuge_session_id STRING COMMENT 'zhuge会话id',
  is_repeat int COMMENT '是否重复线索(手机号维度) 0:正常 1：重复',
  tenant int COMMENT '租户id',
  activity_id STRING COMMENT '活动id',
  activity_name STRING COMMENT '活动名称',
  follow_type int COMMENT '分配类型，0-自动分配，1-手动分配，2-自动转移，3-手动单个转移，4-手动批量转移，5-公海领取',
  shunt_mode_id int COMMENT '匹配到的技能组id',
  shunt_employee_group_id int COMMENT '所属分流员工组',
  ends_time STRING COMMENT '有效时间')
comment '客户关系表'
PARTITIONED BY(starts_time STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
stored as orc
TBLPROPERTIES ('orc.compress'='ZLIB');
"

# 四、合并增量数据与历史数据并写到tmp表中
${HIVE_HOME} -e "
insert overwrite table itcast_ods.customer_clue_tmp partition (starts_time)
select *
from (
         select id,
                create_date_time,
                update_date_time,
                deleted,
                customer_id,
                customer_relationship_id,
                session_id,
                sid,
                status,
                user,
                create_time,
                platform,
                s_name,
                seo_source,
                seo_keywords,
                ip,
                referrer,
                from_url,
                landing_page_url,
                url_title,
                to_peer,
                manual_time,
                begin_time,
                reply_msg_count,
                total_msg_count,
                msg_count,
                comment,
                finish_reason,
                finish_user,
                ends_time,
                platform_description,
                browser_name,
                os_info,
                area,
                country,
                province,
                city,
                creator,
                name,
                idcard,
                phone,
                itcast_school_id,
                itcast_school,
                itcast_subject_id,
                itcast_subject,
                wechat,
                qq,
                email,
                gender,
                level,
                origin_type,
                information_way,
                working_years,
                technical_directions,
                customer_state,
                valid,
                anticipat_signup_date,
                clue_state,
                scrm_department_id,
                superior_url,
                superior_source,
                landing_url,
                landing_source,
                info_url,
                info_source,
                origin_channel,
                course_id,
                course_name,
                zhuge_session_id,
                is_repeat,
                tenant,
                activity_id,
                activity_name,
                follow_type,
                shunt_mode_id,
                shunt_employee_group_id,
                '9999-12-31'    ends_time,
                '${DATESTR}' as starts_time
         from itcast_ods.customer_clue_update
         where starts_time = '${DATESTR}'

         union all

         select rs.id,
                rs.create_date_time,
                rs.update_date_time,
                rs.deleted,
                rs.customer_id,
                rs.customer_relationship_id,
                rs.session_id,
                rs.sid,
                rs.status,
                rs.user,
                rs.create_time,
                rs.platform,
                rs.s_name,
                rs.seo_source,
                rs.seo_keywords,
                rs.ip,
                rs.referrer,
                rs.from_url,
                rs.landing_page_url,
                rs.url_title,
                rs.to_peer,
                rs.manual_time,
                rs.begin_time,
                rs.reply_msg_count,
                rs.total_msg_count,
                rs.msg_count,
                rs.comment,
                rs.finish_reason,
                rs.finish_user,
                rs.ends_time,
                rs.platform_description,
                rs.browser_name,
                rs.os_info,
                rs.area,
                rs.country,
                rs.province,
                rs.city,
                rs.creator,
                rs.name,
                rs.idcard,
                rs.phone,
                rs.itcast_school_id,
                rs.itcast_school,
                rs.itcast_subject_id,
                rs.itcast_subject,
                rs.wechat,
                rs.qq,
                rs.email,
                rs.gender,
                rs.level,
                rs.origin_type,
                rs.information_way,
                rs.working_years,
                rs.technical_directions,
                rs.customer_state,
                rs.valid,
                rs.anticipat_signup_date,
                rs.clue_state,
                rs.scrm_department_id,
                rs.superior_url,
                rs.superior_source,
                rs.landing_url,
                rs.landing_source,
                rs.info_url,
                rs.info_source,
                rs.origin_channel,
                rs.course_id,
                rs.course_name,
                rs.zhuge_session_id,
                rs.is_repeat,
                rs.tenant,
                rs.activity_id,
                rs.activity_name,
                rs.follow_type,
                rs.shunt_mode_id,
                rs.shunt_employee_group_id,
                if(up.id is not null, up.update_date_time, rs.ends_time) ends_time,
                rs.starts_time
         from itcast_ods.customer_clue rs
                  left join
              itcast_ods.customer_clue_update up
              on rs.id = up.id
     ) his
order by his.id, starts_time;
"

# 五、临时表覆盖到拉链表
${HIVE_HOME} -e "
--分区
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=100000;
set hive.exec.max.created.files=150000;
--hive压缩
set hive.exec.compress.intermediate=true;
set hive.exec.compress.output=true;
--写入时压缩生效
set hive.exec.orc.compression.strategy=COMPRESSION;
--分桶
set hive.enforce.bucketing=true;
set hive.enforce.sorting=true;
set hive.optimize.bucketmapjoin = true;
set hive.auto.convert.sortmerge.join=true;
set hive.auto.convert.sortmerge.join.noconditionaltask=true;
--覆盖写回数据
insert overwrite table itcast_ods.customer_clue partition (starts_time)
select * from itcast_ods.customer_clue_tmp;
"
